 # -*- coding: utf-8 -*-
import scrapy


class CsdnSpider(scrapy.Spider):
    name = 'csdn'
    allowed_domains = ['blog.csdn.net']
    keyword = 'python进阶'
    def start_requests(self):
        for pn in range(1,2):
            url = 'https://so.csdn.net/so/search/s.do?p=%s&q=%s&t=blog&viparticle=&domain=&o=&s=&u=&l=&f=&rbg=0' % (pn, self.keyword)
            yield scrapy.Request(
                url=url,
                callback=self.parse  # 回调函数，进入一级页面
            )
    def parse(self, response):
        datas = response.xpath('//div[@class="limit-width"]/a[1]/@href').extract()  # 抓取的内容源
        for data in datas:
            yield scrapy.Request(
                url=data,
                callback=self.parse2   # 进入二级页面
            )
    def parse2(self, response):
        item = dict(
            title=response.xpath('//h1[@class="title-article-box"]/text()').extract_first(),  # 使用选择器extract_first()
            data = response.body
        )
        yield item
